
				********************************************************************
				* 		Enhanced-Home Food Production Program in Burkina Faso 	   *
				* 																   *
				*		By: Malek Abu-Jawdeh & Lilia Bliznashka 				   *
				*                                         	 			           *      
				* Edited: LB, October 3, 2013 				      			       *
				* Edited: MAJ, October 18, 2013 							       *
				* Edited: LH, nov 10, 2013 									       *
				* STATA Version 13												   *
				********************************************************************
clear all
set more off
cap log close
/*
*** define globals ***

	di "`c(pwd)'"

	global dir=subinstr(`"`c(pwd)'"',"\do","",.)

	global data2010 `"$dir/data/Round 1 2010"'
	global data2012 `"$dir/data/Round 2 2012"'
	global data2013 `"$dir/data/Round 3 2013"'
	global datam `"$dir/data/Merged"'
	global log `"$dir/log"'
	global out `"$dir/out"'
	global do `"$dir/do"'
*/
* open clean file *

	use "$data2010/R1_Section_24.dta", clear
	
* merge with cover page to get interview date *

	merge m:1 hhid using "$data2010/R1_Section_0.dta", keepusing(date1ere)
	keep if _m==3  //  one child not in section0
	drop _m
	
* ID child errors (LH nov 10)	
  recode idp_child 1=4 if hhid==10401
  recode idp_child 3=2 if hhid==10522
  recode idp_child 3=6 if hhid==10541
  recode idp_child 6=5 if hhid==10612
  recode idp_child 4=6 if hhid==11509
  recode idp_child 4=3 if hhid==11639
  recode idp_child 1=9 if hhid==11807
  recode idp_child 1=3 if hhid==20460
  recode idp_child 6=3 if hhid==30346
  
* these children do not have anthropometry data (derived from s17)  
/*    +-----------------------------+
     |  hhid   idp_mo~r   idp_ch~d |
     |-----------------------------|
  1. | 10347          2          6 |
  2. | 10508          2          8 |
  3. | 10544          2          3 |
  4. | 10552          3          9 |
  5. | 10946          2          5 |
     |-----------------------------|
  6. | 20601          2          3 |
  7. | 30106          2          3 |
  8. | 40102          1          7 |
  9. | 40109          3         12 |
 10. | 40302          2          3 |
     |-----------------------------|
 11. | 40620          2          3 |
     +-----------------------------+ */


	
* rename and relabel vars *	
	rename date1ere intdate
	lab var intdate "Interview date" 
	
* double check for duplicates and unique IDs * 		
	isid hhid idp_child
		* ok *
		
	duplicates report
		* no duplicates *

* cleaning vars *
	
*** interview date ***
	
	tab intdate, m nol
		
		* fix miscoding *
			gen intdate_year=year(intdate)
			gen intdate_month=month(intdate)
			gen intdate_day=day(intdate)

			tab intdate_year
			replace intdate_year=2010 if intdate_year==1009
			replace intdate_year=2010 if intdate_year==2001
			replace intdate_year=2010 if intdate_year==2009
			replace intdate_year=2010 if intdate_year==2201 
			
		* recode back to date format *
		replace intdate=mdy(intdate_month, intdate_day, intdate_year)
		format intdate %td
		
		tab intdate
			* ok *
				
*** date of birth *** 

	tab1 s24_q3a s24_q3b s24_q3c, m nol
		* 14 missing days, 2 missing months, 2 missing year * 
				
		* create DOB * 
			* verify DOB will be correct * 
			cap assert s24_q3a<=30 if !missing(s24_q3a) & (s24_q3b==4 | s24_q3b==6 | s24_q3b==9 | s24_q3b==11)
				* 5 contradictions *
				*br if s24_q3a>30 & (s24_q3b==4 | s24_q3b==6 | s24_q3b==9 | s24_q3b==11)
				
				replace s24_q3a=30 if s24_q3b==6 & hhid==10539
				replace s24_q3a=30 if s24_q3b==9 & hhid==12128
				replace s24_q3a=30 if s24_q3b==9 & hhid==20901
				replace s24_q3a=30 if s24_q3b==11 & hhid==40706
				replace s24_q3a=30 if s24_q3b==11 & hhid==40722
				
				
			assert s24_q3a<=31 if !missing(s24_q3a) & (s24_q3b==1 | s24_q3b==3 | s24_q3b==5 | s24_q3b==7 | s24_q3b==8 | s24_q3b==10 | s24_q3b==12)
			assert s24_q3a<=28 if !missing(s24_q3a) & s24_q3b==2
		
		gen birthdate = mdy(s24_q3b, s24_q3a, s24_q3c)
		format birthdate %td
			* 12 with missing days. ok * 
		
		lab var birthdate "Date of birth" 
		
		
		* generate random DOB if day is missing * 
		gen date_born_rand=1 if s24_q3a==. 
		replace date_born_rand=0 if s24_q3a~=. 
		tab date_born_rand
			* ok *
		
		lab var date_born_rand "Child birth date randomly created"
		label values date_born_rand yesno

		* If birth date and interview date have the same month and year, and birth day is missing, generate a birth date that is between 1 and the interview date * 
		* Can end up with age in days = 0 if birth date and measurement date are equal, which could happen even though it's unlikely * 

		gen s24_q3a_new=round(uniform()*intdate_day+1,1) if (s24_q3b==intdate_month & intdate_year==s24_q3c & s24_q3a==.)
		tab s24_q3a_new, m
		replace s24_q3a=s24_q3a_new if (s24_q3b==intdate_month & intdate_year==s24_q3c & s24_q3a==.)

		* If birth date and measurement date are different, just replace the missing birth date with a random integer controlling for the length of the month and leap years *
		replace s24_q3a= round(uniform()*29+1,1) if s24_q3a==. & (s24_q3b==4 | s24_q3b==6 | s24_q3b==9 | s24_q3b==11) 
		replace s24_q3a= round(uniform()*30+1,1) if s24_q3a==. & (s24_q3b==1 | s24_q3b==3 | s24_q3b==5 | s24_q3b==7 | s24_q3b==8 | s24_q3b==10 | s24_q3b==12) 
		replace s24_q3a= round(uniform()*27+1,1) if s24_q3a==. & s24_q3b==2

		tab s24_q3a, m
		
		
		* update DOB *
		replace birthdate = mdy(s24_q3b, s24_q3a, s24_q3c)
		format birthdate %td

		tab birthdate, m
			* 2 are missing day, month and year * 
		

*** child height *** 
	
	tab s24_q7, m nol
	replace s24_q7=. if (s24_q7<20 | s24_q7>120)
	
*** child weight *** 
	
	replace s24_q5=. if s24_q5<35
	gen ch_weight=s24_q5-s24_q6 
	tab ch_weight, m

	lab var ch_weight "Child's weight (in kg)"
	
*** create age vars ***

	gen ch_ageda=intdate-birthdate
	tab ch_ageda, m
	
	gen ch_agemo=ch_ageda/(365.25/12)
	tab ch_agemo, m
	
	label variable ch_ageda "Child age in days"
	label variable ch_agemo "Child age in months (aged/30.437)"
	
	
	* drop if ch_agemo<=0  // (LH, nov10: impute ages from roster for these)
	list hhid idp_m idp_ch ch_agemo birthdate intdate if ch_agemo<=0 		
	replace ch_agemo=12  if hhid==10347 & idp_ch==6
	replace ch_agemo=6   if hhid==10508 & idp_ch==8 	
	replace ch_agemo=5   if hhid==10544 & idp_ch==3 	
	replace ch_agemo=6   if hhid==10552 & idp_ch==9 	
	replace ch_agemo=3   if hhid==10946 & idp_ch==5 	
	replace ch_agemo=6   if hhid==20601 & idp_ch==3
	replace ch_agemo=6   if hhid==30106 & idp_ch==3
	replace ch_agemo=7   if hhid==40102 & idp_ch==7
	replace ch_agemo=8   if hhid==40109 & idp_ch==12
	replace ch_agemo=11  if hhid==40302 & idp_ch==3


	list hhid idp_mo idp_ch birthdate intdate if ch_agemo==. // age in months from r1_s1 - roster, LH 25/11
	replace ch_agemo=11 if hhid==11831 & idp_ch==5
	replace ch_agemo=7  if hhid==12123 & idp_ch==3
	replace ch_agemo=8  if hhid==10912 & idp_ch==2				
	count if missing(ch_agemo) // zero missing ages
	
	
*** rename vars to convention ****

	rename s24_q4 s1_q2
	
	* Nov 21, 2013 LB there are some wrong child sex entries. merge to r1s1 to get the correct ones *
	rename idp_child idp
	sort hhid idp
	count 
		
	merge 1:1 hhid idp using "$data2010/R1_Section_1", keepusing(s1_q2) update replace
	drop if _m==2
	drop _m	
	
	rename s1_q2 ch_sex 
	rename idp idp_child
	rename s24_q7 ch_height

	
*** drop unnecessary vars **** 	
	drop s24_q3a s24_q3b s24_q3c s24_q5 s24_q6 s24_q8 intdate_year intdate_month intdate_day s24_q3a_new

	order hhid idp_mother idp_child intdate birthdate ch_sex ch_ageda ch_agemo ch_weight ch_height date_born_rand
	
	
*** creating 1, 3, and 6 motnh age groups ***

	* 6 months * 
	egen ch_age6mo=cut(ch_agemo), at(0(6)60) icodes
	replace ch_age6mo=ch_age6mo+1
	label define ch_age6mo 1 "0 to 5.9"
	local counter=2 
	forvalue j=6(6)60 {
		local upper=`j'+6-.1
		lab def ch_age6mo `counter' "`j' to `upper'", add 
		local counter=`counter'+1
	}
	label values ch_age6mo ch_age6mo
	lab var ch_age6mo "Age groups (6 mo) index child" 

	* 3 months *
	egen ch_age3mo=cut(ch_agemo), at(0(3)60) icodes
	replace ch_age3mo=ch_age3mo+1
	label define ch_age3mo 1 "0 to 2.9" 
	local counter=2 
	forvalue j=3(3)60{
		local upper=`j'+3-.1
		lab def ch_age3mo `counter' "`j' to `upper'", add
		local counter=`counter'+1
	}
	label values ch_age3mo ch_age3mo
	lab var ch_age3mo "Age groups (3 mo) index child"
	
	* 1 month *
	egen ch_age1mo=cut(ch_agemo), at(0(1)60) icodes
	replace ch_age1mo=ch_age1mo+1
	lab def ch_age1mo 1 "0 to 0.9"
	local counter=2
	forvalue j=1(1)60 {
		local upper=`j'+1-.1
		lab def ch_age1mo `counter' "`j' to `upper'", add
		local counter=`counter'+1
	}
	lab values ch_age1mo ch_age1mo
	lab var ch_age1mo "Age groups (1 mo) index child" 
	


*** Create Zscores ****
	
	zscore06, a(ch_agemo) h(ch_height) w(ch_weight) s(ch_sex) female(0) male(1)
	
	rename haz06 ch_haz06
	rename waz06 ch_waz06
	rename whz06 ch_whz06
	rename bmiz06 ch_bmi06
	

*** New Vars *** 

*** anemic ***
	tab s24_q9, m

	gen anemic=0 if !missing(s24_q9)
	replace anemic=1 if s24_q9<11
	lab var anemic "Child is anemic" 
		
	gen anemic_severe=0 if !missing(s24_q9)
	replace anemic_severe=1 if s24_q9<7
	lab var anemic_severe "Child is severely anemic"

*** stunting *** 
	tab ch_haz06, m
	
	gen stunted=0 if !missing(ch_haz06)
	replace stunted=1 if ch_haz06<-2
	lab var stunted "Child is stunted" 
	
*** underweight ***
	tab ch_waz06, m 
	
	gen underweight=0 if !missing(ch_waz06)
	replace underweight=1 if ch_waz06<-2
	lab var underweight "Child is underweight"
	
*** wasting *** 	
	tab ch_whz06, m
	
	gen wasting=0 if !missing(ch_whz06)
	replace wasting=1 if ch_whz06<-2
	lab var wasting "Child is wasting"

exit
